
rm(list=ls())
options(scipen=100,digits=10)

library(rgdal)
library(gdalUtils)
library(raster)
library(dplyr)
library(sf)
library(tidyverse)
library(rgeos)
library(foreign)
library(tidyr)
library(geosphere)
library(lubridate)
library(lfe)
library(stargazer)
setwd("working directory")
load("Main_Data.Rdata")

#Table 2: Main Results
DiD_base_density=felm(AOD ~ window + prod +   Treatment_window_count_20km +  Treatment_prod_count_20km + Plug_Count_20KM
                      + PPT + TDMEAN + TMEAN + Wind_Speed | 
                        factor(OBJECTID)+factor(DATE) |0 |clust, data = data)
summary(DiD_base_density)

DiD_spatial_density=felm(AOD ~ window + prod + window_2 + window_5 + window_10
                               + prod_2 + prod_5 + prod_10  +  Treatment_window_count_20km +  Treatment_prod_count_20km + Plug_Count_20KM
                               + PPT + TDMEAN + TMEAN + Wind_Speed | 
                                 factor(OBJECTID)+factor(DATE) |0 |clust , data = data)
summary(DiD_spatial_density)

#Table 3: using treatment group only
treat_list=unique(data$OBJECTID[which(data$window==1|data$prod==1)])
data_treat=subset(data, OBJECTID%in%treat_list)

DiD_base_density_treat=felm(AOD ~ window + prod +  Treatment_window_count_20km +  Treatment_prod_count_20km + Plug_Count_20KM
                            + PPT + TDMEAN + TMEAN + Wind_Speed | 
                              factor(OBJECTID)+factor(DATE) |0 |clust, data = data_treat)
summary(DiD_base_density_treat)

DiD_spatial_density_treat=felm(AOD ~ window + prod + window_2 + window_5 + window_10
                               + prod_2 + prod_5 + prod_10  +  Treatment_window_count_20km +  Treatment_prod_count_20km + Plug_Count_20KM
                               + PPT + TDMEAN + TMEAN + Wind_Speed | 
                                 factor(OBJECTID)+factor(DATE) |0 |clust , data = data_treat)
summary(DiD_spatial_density_treat)

## Table 4: Matched Sample, column (1) and (2)
## Column (3) code is in a separate file: "PSM.R:

setwd("/Volumes/HL-WD/Fracking_Archive/Data")
# Matching based on distance and permit time (Month/Year)
well_info=read.csv("well_info.csv")
well_info=subset(well_info, OBJECTID!=348704&OBJECTID!=438502&OBJECTID!=486792)
well_info=subset(well_info,!(is.na(PERMIT_DATE)))
well_info$PERMIT_YEAR=year(well_info$PERMIT_DATE)
well_info$PERMIT_MONTH=month(well_info$PERMIT_DATE)
treat_list=unique(data$OBJECTID[which(data$window==1|data$prod==1)])
well_treat=subset(well_info, OBJECTID%in%treat_list)
well_control=subset(well_info, !(OBJECTID%in%treat_list))

treat_year=well_treat$PERMIT_YEAR
control_year=well_control$PERMIT_YEAR

Year_diff=outer(treat_year,control_year,'==')

Year_diff[which(Year_diff==FALSE)]=NA
Year_diff[which(Year_diff==TRUE)]=1

treat_month=well_treat$PERMIT_MONTH
control_month=well_control$PERMIT_MONTH

Month_diff=outer(treat_month,control_month,'==')

Month_diff[which(Month_diff==FALSE)]=NA
Month_diff[which(Month_diff==TRUE)]=1


well_treat=well_treat[c("OBJECTID","LONGITUDE","LATITUDE")]
well_control=well_control[c("OBJECTID","LONGITUDE","LATITUDE")]

coordinates(well_treat)=c("LONGITUDE","LATITUDE")
coordinates(well_control)=c("LONGITUDE","LATITUDE")
proj4string(well_treat)=CRS("+proj=longlat +ellps=WGS84 +datum=WGS84")
proj4string(well_control)=CRS("+proj=longlat +ellps=WGS84 +datum=WGS84")

well_treat=st_as_sf(well_treat, coords = c("X", "Y"))
well_control=st_as_sf(well_control, coords = c("X", "Y"))
DD=st_distance(well_treat,well_control)


DD1=DD*Year_diff*Month_diff

colnames(DD1)=well_control$OBJECTID
rownames(DD1)=well_treat$OBJECTID

DD1=DD1[rowSums(is.na(DD1)) != ncol(DD1), ]
## Find name of min element for each row
mins=apply(DD1, 1, which.min)
ii=as.numeric(mins)
min_well=colnames(DD1[,ii])

match=as.data.frame(rownames(DD1))
match$Control=as.numeric(min_well)
names(match)[1]="Treatment"

data_match=subset(data, OBJECTID%in%match$Treatment|OBJECTID%in%match$Control)

DiD_match_density_1=felm(AOD ~ window + prod + Treatment_window_count_20km +  Treatment_prod_count_20km + Plug_Count_20KM
                         + PPT + TDMEAN + TMEAN + Wind_Speed | 
                           factor(OBJECTID)+factor(DATE)|0|clust, data = data_match)
summary(DiD_match_density_1)

DiD_Spatial_match_density_1=felm(AOD ~ window + prod + window_2 + window_5 + window_10
                         + prod_2 + prod_5   + prod_10 + Treatment_window_count_20km +  Treatment_prod_count_20km + Plug_Count_20KM
                         + PPT + TDMEAN + TMEAN + Wind_Speed | 
                           factor(OBJECTID)+factor(DATE)|0|clust, data = data_match)
summary(DiD_Spatial_match_density_1)

# Matching based on distance and permit time and county
well_info=read.csv("well_info.csv")
well_info=subset(well_info, OBJECTID!=348704&OBJECTID!=438502&OBJECTID!=486792)
well_info=subset(well_info,!(is.na(PERMIT_DATE)))
well_info$PERMIT_YEAR=year(well_info$PERMIT_DATE)
well_info$PERMIT_MONTH=month(well_info$PERMIT_DATE)
treat_list=unique(data$OBJECTID[which(data$window==1|data$prod==1)])
well_treat=subset(well_info, OBJECTID%in%treat_list)
well_control=subset(well_info, !(OBJECTID%in%treat_list))

treat_year=well_treat$COUNTY
control_year=well_control$COUNTY

County_diff=outer(treat_year,control_year,'==')

County_diff[which(County_diff==FALSE)]=NA
County_diff[which(County_diff==TRUE)]=1

DD1=DD*Year_diff*County_diff

colnames(DD1)=well_control$OBJECTID
rownames(DD1)=well_treat$OBJECTID

DD1=DD1[rowSums(is.na(DD1)) != ncol(DD1), ]
## Find name of min element for each row
mins=apply(DD1, 1, which.min)
ii=as.numeric(mins)
min_well=colnames(DD1[,ii])

match=as.data.frame(rownames(DD1))
match$Control=as.numeric(min_well)
names(match)[1]="Treatment"

data_match=subset(data, OBJECTID%in%match$Treatment|OBJECTID%in%match$Control)

DiD_match_density_2=felm(AOD ~ window + prod +  Treatment_window_count_20km +  Treatment_prod_count_20km + Plug_Count_20KM
                         + PPT + TDMEAN + TMEAN + Wind_Speed | 
                           factor(OBJECTID)+factor(DATE)|0|clust, data = data_match)
summary(DiD_match_density_2)


DiD_Spatial_match_density_2=felm(AOD ~ window + prod + window_2 + window_5 + window_10
                         + prod_2 + prod_5 + prod_10 + Treatment_window_count_20km +  Treatment_prod_count_20km + Plug_Count_20KM
                         + PPT + TDMEAN + TMEAN + Wind_Speed | 
                           factor(OBJECTID)+factor(DATE)|0|clust, data = data_match)
summary(DiD_Spatial_match_density_2)

## Table 5: window not clean
data$window_Extend=0
data$window_Extend[which(data$DATE>=data$C_Start-30&data$DATE<=data$C_End)]=1
DiD_spatial1=felm(AOD ~ window_Extend + prod  + window_2 + window_5 + window_10
                  + prod_2 + prod_5 + prod_10 +  Treatment_window_count_20km +  Treatment_prod_count_20km + Plug_Count_20KM
                  + PPT + TDMEAN + TMEAN + Wind_Speed | 
                    factor(OBJECTID)+factor(DATE) |0 |clust , data = data)
summary(DiD_spatial1)

data$window_Extend=0
data$window_Extend[which(data$DATE>=data$C_Start-360&data$DATE<=data$C_End)]=1
DiD_spatial2=felm(AOD ~ window_Extend + prod  + window_2 + window_5 + window_10
                  + prod_2 + prod_5 + prod_10 +  Treatment_window_count_20km +  Treatment_prod_count_20km + Plug_Count_20KM
                  + PPT + TDMEAN + TMEAN + Wind_Speed | 
                    factor(OBJECTID)+factor(DATE) |0 |clust , data = data)
summary(DiD_spatial2)

## Table 6: Falsification Test

data$window=0
data$window[which(data$DATE>=data$C_Start&data$DATE<=data$C_End)]=1
data$prod=0
data$prod[which(data$DATE>=data$P_Start & data$DATE<=data$P_End)]=1
data1=subset(data, window!=1&prod!=1)
data1$fake1=0
data1$fake2=0
data1$fake1[which(data1$DATE<data1$C_Start-720&data1$DATE>=data1$C_Start-1080)]=1
data1$fake2[which(data1$DATE<data1$C_Start-1080&data1$DATE>=data1$C_Start-1440)]=1

DiD_base1=felm(AOD ~ fake1  +  Treatment_window_count_20km +  Treatment_prod_count_20km + Plug_Count_20KM
               + PPT + TDMEAN + TMEAN + Wind_Speed | 
                 factor(OBJECTID)+factor(DATE) |0 |clust , data = data1)
summary(DiD_base1)

DiD_spatial1=felm(AOD ~ fake1 + window_2 + window_5 + window_10
                  + prod_2 + prod_5 + prod_10 + Treatment_window_count_20km +  Treatment_prod_count_20km + Plug_Count_20KM
                  + PPT + TDMEAN + TMEAN + Wind_Speed | 
                    factor(OBJECTID)+factor(DATE) |0 |clust , data = data1)
summary(DiD_spatial1)

DiD_base2=felm(AOD ~ fake2  +  Treatment_window_count_20km +  Treatment_prod_count_20km + Plug_Count_20KM
               + PPT + TDMEAN + TMEAN + Wind_Speed | 
                 factor(OBJECTID)+factor(DATE) |0 |clust , data = data1)
summary(DiD_base2)

DiD_spatial2=felm(AOD ~ fake2 + window_2 + window_5 + window_10
                  + prod_2 + prod_5 + prod_10 + Treatment_window_count_20km +  Treatment_prod_count_20km + Plug_Count_20KM
                  + PPT + TDMEAN + TMEAN + Wind_Speed | 
                    factor(OBJECTID)+factor(DATE) |0 |clust , data = data1)
summary(DiD_spatial2)

